/*******************************************************************************																	

	DESCRIPTION: 	This do file creates diagnostics plots for the tuning of the
					ML models.
	
*******************************************************************************/

clear all
global id_code 142

********************** 1. TIME SERIES ******************************************

* Import all parameters:
foreach pred in rf boost lasso {
	tempname tempframe
	frame create `tempframe'
	
	cap frame drop `pred'
	frame create `pred'
	
	frame change `pred'
	
			forval year = 1992/2016 {
				if `year' == 1992 & "`pred'" == "rf" {
					import delimited "${data}/102_`pred'grid_Full_emplAft6M_0M_In_`year'.csv", clear
					gen year = `year'
				}
				else {
					frame `tempframe' {
						import delimited "${data}/102_`pred'grid_Full_emplAft6M_0M_In_`year'.csv", clear
						gen year = `year'
						tempfile tempsave
						save `tempsave', replace
					}
					
					append using `tempsave'
					
				}
				
			}
			
	
	frame change default
		
}


* Now we create plots, starting with RF:
frame change rf

	* Plot:
	twoway (connected mtry year, color(ebblue)) ///
		, ///
		xline(2006, lcolor(gray) lpatter(dash)) ///
		ytitle("Variables per node (mtry)") xtitle("") ///
		ylabel(10(10)50, angle(0) format(%5.0f)) ///
		xlabel(1992(4)2016, labsize(small)) ///
		legend(cols(1) size(small) symxsize(*0.5)) ///
		graphregion(color(white)) name(scatt1, replace)
		
	twoway (connected minnodesize year, color(ebblue)) ///
		, ///
		xline(2006, lcolor(gray) lpatter(dash)) ///
		ytitle("Minimum node size (min.node.size)") xtitle("") ///
		ylabel(1(2)9 12(2)14, angle(0) format(%5.0f)) ///
		xlabel(1992(4)2016, labsize(small)) ///
		legend(cols(1) size(small) symxsize(*0.5)) ///
		graphregion(color(white)) name(scatt2, replace)
		
frame change default

* Boosted gradient:
frame change boost

	* Plot:
	twoway (connected eta year, color(ebblue)) ///
		(scatteri 0.0001 1992, msymbol(none)) ///
		, ///
		xline(2006, lcolor(gray) lpatter(dash)) ///
		ytitle("Learning rate (eta)") xtitle("") ///
		yscale(log) ylabel(0.0001 "0.0001" 0.001 "0.001" 0.01 "0.01" 0.1 "0.1" 1, angle(0)) ///
		xlabel(1992(4)2016, labsize(small)) ///
		legend(off) ///
		graphregion(color(white)) name(scatt3, replace)
		
frame change default

* Lasso:
frame change lasso

	* Plot:
	twoway (connected lambda year, color(ebblue)) ///
		(scatteri 0.0001 1992, msymbol(none)) ///
		, ///
		xline(2006, lcolor(gray) lpatter(dash)) ///
		ytitle("Penalty (lambda)") xtitle("") ///
		yscale(log) ylabel(0.0001 "0.0001" 0.001 "0.001" 0.01 "0.01" 0.1 "0.1", angle(0)) ///
		xlabel(1992(4)2016, labsize(small)) ///
		legend(off) ///
		graphregion(color(white)) name(scatt4, replace)
		
frame change default

graph combine scatt1 scatt2 scatt3 scatt4, /// 
	xcommon ///
	graphregion(color(white)) name(ts_all, replace)
	

graph export "${output}/${id_code}_ML_Models_Tuning_Time_Series.pdf", replace


********************** 2. CV OUTPUT ******************************************

local outcome emplAft6M_0M_In
local year 2006
local model Full

* Import data for RF:
import delimited "${data}/102_rfgrid_search_`model'_`outcome'_`year'.csv", clear

gsort -roc
local minnodesize_star = minnodesize[1]
local mtry_star = mtry[1]
local max_auc = roc[1]
sort mtry minnodesize


twoway (connected roc minnodesize if mtry == 10 & roc > 0.65, color(ebblue*0.4)) ///
	(connected roc minnodesize if mtry == 20 & roc > 0.65, color(ebblue*0.7)) ///
	(connected roc minnodesize if mtry == 30 & roc > 0.65, color(ebblue)) ///
	(connected roc minnodesize if mtry == 40 & roc > 0.65, color(ebblue*1.3)) ///
	(connected roc minnodesize if mtry == 50 & roc > 0.65, color(ebblue*1.7)) ///
	, ///
	subtitle("R. Forest") ///
	xline(`minnodesize_star', lpattern(dash) lcolor(orange_red)) ///
	xtitle("Minimum node size (minnodesize)") ///
	ytitle(AUC) ylabel(0.65(0.025)0.725, format(%5.3f) angle(0)) ///
	legend(order(1 "10" 2 "20" 3 "30" 4 "40" 5 "50") ///
		subtitle("Variables per node (mtry)", size(small)) ///
		symxsize(*0.5) size(small) cols(2) colfirst pos(5) ring(0)) ///
	graphregion(color(white)) name(cv_rf, replace)
		
	
* Import data for B. Gradient:
import delimited "${data}/102_boostgrid_search_`model'_`outcome'_`year'.csv", clear

gsort -roc
local eta_star = eta[1]
sort eta

twoway (connected roc eta if roc > 0.65, color(ebblue)) ///
	, ///
	subtitle("B. Gradient") ///
	xline(`eta_star', lpattern(dash) lcolor(orange_red)) ///
	xtitle("Learning rate (eta)") xscale(log) xlabel(0.0001 "0.0001" 0.001 "0.001" 0.01 "0.01" 0.1 "0.1" 1) ///
	ytitle(AUC) ylabel(0.65(0.025)0.725, format(%5.3f) angle(0)) ///
	graphregion(color(white)) name(cv_boost, replace)
		
	
* Import data for Lasso:
import delimited "${data}/102_lassogrid_search_`model'_`outcome'_`year'.csv", clear

gsort -roc
local lambda_star = lambda[1]
local max_auc = roc[1]
sort lambda

twoway (connected roc lambda if roc > 0.65, color(ebblue)) ///
	, ///
	subtitle("Lasso") ///
	xline(`lambda_star', lpattern(dash) lcolor(orange_red)) ///
	/* text(`=`max_auc'+0.01' `= `lambda_star' + 0.001' "{&lambda} = `: di %5.4f `lambda_star''", color(orange_red) placement(e)) */ ///
	xtitle("Penalty (lambda)") xscale(log) xlabel(0.0001 "0.0001" 0.001 "0.001" 0.01 "0.01" 0.1 "0.1") ///
	ytitle(AUC) ylabel(0.65(0.025)0.725, format(%5.3f) angle(0)) ///
	graphregion(color(white)) name(cv_lasso, replace)
	
graph combine cv_rf cv_boost cv_lasso, ///
	ycommon holes(2) ///
	graphregion(color(white)) name(cv_`model', replace)
	
graph export "${output}/${id_code}_ML_Models_Tuning_CV_results_`model'_`year'.pdf", replace


********************** 3. LATEX TABLE FOR 2006 ******************************************

* Import all parameters:
local outcome emplAft6M_0M_In
local year 2006

* Import data for RF:
import delimited "${data}/102_rfgrid_Full_`outcome'_`year'.csv", clear
local minnodesize_star = minnodesize[1]
local mtry_star = mtry[1]

* Import data for B. Gradient:
import delimited "${data}/102_boostgrid_Full_`outcome'_`year'.csv", clear
local eta_star = eta[1]

* Import data for Lasso:
import delimited "${data}/102_lassogrid_Full_`outcome'_`year'.csv", clear
local lambda_star = lambda[1]

* Write table:
file open myfile using "${output}/${id_code}_ML_Models_Tuning_`year'.tex", write replace

file write myfile /// Document preamble
	"%\documentclass{article}" _newline ///
	"%\usepackage{booktabs}" _newline ///
	"%\usepackage[margin=1in]{geometry}" _newline ///
	"%\begin{document}" _newline ///
	"%\begin{table}[h] \centering" _newline ///
	"\footnotesize \begin{tabular}{l c c}" _newline ///
	"\hline \hline \addlinespace[1.5ex]" _newline ///
	/// Headers
	"Model & Parameter name & Optimal value \\ \addlinespace[0.1cm]" _newline ///
	" \hline \addlinespace[1.5ex]" _newline ///
	/// Contents
	"R. Forest & mtry & `: di %5.0f `mtry_star'' \\ \addlinespace[0.2cm]" _newline ///
	"R. Forest & min.node.size & `: di %5.0f `minnodesize_star'' \\ \addlinespace[0.2cm]" _newline ///
	"B. Gradient & eta & `: di %6.4f `eta_star'' \\ \addlinespace[0.2cm]" _newline ///
	"Lasso & lambda & `: di %6.4f `lambda_star'' \\ \addlinespace[0.2cm]" _newline ///
	/// Ending
	"\addlinespace[0ex]" _newline ///
	"\hline \hline \addlinespace[1.5ex]" _newline ///
	"\end{tabular}" _newline ///
	"%\end{table}" _newline ///
	"%\end{document}"

file close myfile

